{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0de5200-48a0-4b9c-8b14-65a4c504e9a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.ticker import FuncFormatter, MultipleLocator\n",
    "from matplotlib.ticker import FuncFormatter\n",
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.api as sm  "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1dcab523-52ba-489b-8e70-4ceef984614a",
   "metadata": {},
   "source": [
    "# 1. Analysis 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55614eb6-10d5-4d69-8345-0cdfdf0a0134",
   "metadata": {},
   "source": [
    "## Figure 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5bfce87-c283-4f05-a1a6-7a26ea3003ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 0) Paths\n",
    "# ----------------------------\n",
    "BASE_DIR = r\"E:\\main\"\n",
    "INPUT_XLSX = os.path.join(BASE_DIR, \"synthetic_monthly_framing_2010_2025.xlsx\")\n",
    "INPUT_CSV  = os.path.join(BASE_DIR, \"synthetic_monthly_framing_2010_2025.csv\")\n",
    "\n",
    "OUT_PNG = os.path.join(BASE_DIR, \"figure_1_timeseries.png\")\n",
    "OUT_PDF = os.path.join(BASE_DIR, \"figure_1_timeseries.pdf\")\n",
    "\n",
    "# ----------------------------\n",
    "# 1) Load data\n",
    "# ----------------------------\n",
    "if os.path.exists(INPUT_XLSX):\n",
    "    df = pd.read_excel(INPUT_XLSX)\n",
    "elif os.path.exists(INPUT_CSV):\n",
    "    df = pd.read_csv(INPUT_CSV, encoding=\"utf-8-sig\")\n",
    "else:\n",
    "    raise FileNotFoundError(\"Input file not found. Put the xlsx/csv into the BASE_DIR path.\")\n",
    "\n",
    "# Parse dates\n",
    "df[\"date\"] = pd.to_datetime(df[\"month\"] + \"-01\")  # month is 'YYYY-MM'\n",
    "df = df.sort_values(\"date\").reset_index(drop=True)\n",
    "\n",
    "# Columns (expected)\n",
    "g_nat_col = \"grievance_share_national\"\n",
    "g_loc_col = \"grievance_share_local\"\n",
    "d_col = \"difference_local_minus_national\"\n",
    "\n",
    "# Ensure difference exists\n",
    "if d_col not in df.columns:\n",
    "    df[d_col] = df[g_loc_col] - df[g_nat_col]\n",
    "\n",
    "# ----------------------------\n",
    "# 2) Optional smoothing (rolling mean)\n",
    "#    This helps visual interpretation without hiding volatility.\n",
    "# ----------------------------\n",
    "WINDOW = 6  # 6-month rolling mean (adjust to 3 or 12 if you prefer)\n",
    "df[\"g_nat_smooth\"] = df[g_nat_col].rolling(WINDOW, center=True, min_periods=1).mean()\n",
    "df[\"g_loc_smooth\"] = df[g_loc_col].rolling(WINDOW, center=True, min_periods=1).mean()\n",
    "df[\"d_smooth\"] = df[d_col].rolling(WINDOW, center=True, min_periods=1).mean()\n",
    "\n",
    "# ----------------------------\n",
    "# 3) Figure settings (high-detail)\n",
    "# ----------------------------\n",
    "plt.rcParams.update({\n",
    "    \"font.size\": 14,\n",
    "    \"axes.titlesize\": 16,\n",
    "    \"axes.labelsize\": 12,\n",
    "    \"xtick.labelsize\": 12,\n",
    "    \"ytick.labelsize\": 12,\n",
    "    \"legend.fontsize\": 12,\n",
    "    \"axes.linewidth\": 0.8,\n",
    "})\n",
    "\n",
    "# ----------------------------\n",
    "# 4) Create a 2-panel figure: Figure 1(a) and Figure 1(b)\n",
    "# ----------------------------\n",
    "fig, axs = plt.subplots(\n",
    "    2, 1,\n",
    "    figsize=(14, 8),      # wide, readable\n",
    "    sharex=True,\n",
    "    gridspec_kw={\"height_ratios\": [2.2, 1.3]}\n",
    ")\n",
    "\n",
    "ax1, ax2 = axs\n",
    "\n",
    "# ----- Panel (a): levels -----\n",
    "ax1.plot(df[\"date\"], df[g_nat_col], linewidth=1.0, alpha=0.35, label=\"National (monthly)\")\n",
    "ax1.plot(df[\"date\"], df[g_loc_col], linewidth=1.0, alpha=0.35, label=\"Local (monthly)\")\n",
    "\n",
    "# Smoothed overlays (same default color; no custom colors)\n",
    "ax1.plot(df[\"date\"], df[\"g_nat_smooth\"], linewidth=2.0, label=f\"National ({WINDOW}-month mean)\")\n",
    "ax1.plot(df[\"date\"], df[\"g_loc_smooth\"], linewidth=2.0, label=f\"Local ({WINDOW}-month mean)\")\n",
    "\n",
    "ax1.set_ylabel(\"Grievance share\")\n",
    "ax1.set_ylim(0, 1)\n",
    "\n",
    "# Light grid for readability\n",
    "ax1.grid(True, linestyle=\"--\", linewidth=0.6, alpha=0.5)\n",
    "\n",
    "ax1.set_title(\"Figure 3(a). Monthly grievance-based framing share (national vs local)\", pad=10)\n",
    "\n",
    "# Event markers (if present)\n",
    "if \"event_flag\" in df.columns:\n",
    "    event_rows = df[df[\"event_flag\"] == 1]\n",
    "    for _, r in event_rows.iterrows():\n",
    "        ax1.axvline(r[\"date\"], linewidth=1.0, alpha=0.25)\n",
    "        # label only if event_label exists and is non-empty\n",
    "        if \"event_label\" in df.columns and isinstance(r.get(\"event_label\", \"\"), str) and r[\"event_label\"].strip():\n",
    "            ax1.text(\n",
    "                r[\"date\"], 0.98,\n",
    "                r[\"event_label\"],\n",
    "                rotation=90,\n",
    "                va=\"top\",\n",
    "                ha=\"right\",\n",
    "                fontsize=8,\n",
    "                alpha=0.8\n",
    "            )\n",
    "\n",
    "ax1.legend(loc=\"upper left\", frameon=False, ncol=2)\n",
    "\n",
    "# ----- Panel (b): difference -----\n",
    "ax2.plot(df[\"date\"], df[d_col], linewidth=1.0, alpha=0.35, label=r\"$D_t$ (monthly)\")\n",
    "ax2.plot(df[\"date\"], df[\"d_smooth\"], linewidth=2.0, label=f\"$D_t$ ({WINDOW}-month mean)\")\n",
    "\n",
    "# Zero reference line\n",
    "ax2.axhline(0, linewidth=1.0, alpha=0.5)\n",
    "\n",
    "ax2.set_ylabel(r\"$D_t$ = Local − National\")\n",
    "ax2.set_ylim(-0.6, 0.8)  # adjust if needed after seeing data\n",
    "ax2.grid(True, linestyle=\"--\", linewidth=0.6, alpha=0.5)\n",
    "ax2.set_title(\"Figure 3(b). Monthly difference in shares (local minus national)\", pad=10)\n",
    "\n",
    "ax2.legend(loc=\"upper left\", frameon=False)\n",
    "\n",
    "# ----------------------------\n",
    "# 5) X-axis formatting: show years cleanly\n",
    "# ----------------------------\n",
    "# Major ticks: every year (January)\n",
    "years = pd.date_range(df[\"date\"].min(), df[\"date\"].max(), freq=\"YS\")\n",
    "ax2.set_xticks(years)\n",
    "\n",
    "# Label: YYYY\n",
    "ax2.set_xticklabels([d.strftime(\"%Y\") for d in years], rotation=0)\n",
    "\n",
    "# Minor ticks: every quarter (optional)\n",
    "quarters = pd.date_range(df[\"date\"].min(), df[\"date\"].max(), freq=\"QS\")\n",
    "ax2.set_xticks(quarters, minor=True)\n",
    "\n",
    "# Add minor grid for quarters (subtle)\n",
    "ax2.grid(True, which=\"minor\", axis=\"x\", linestyle=\":\", linewidth=0.4, alpha=0.35)\n",
    "\n",
    "# ----------------------------\n",
    "# 6) Y-axis tick formatting\n",
    "# ----------------------------\n",
    "# (a) share axis: show as 0.0–1.0 with 0.1 ticks\n",
    "ax1.yaxis.set_major_locator(MultipleLocator(0.1))\n",
    "ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f\"{x:.1f}\"))\n",
    "\n",
    "# (b) difference axis: show 2 decimals\n",
    "ax2.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f\"{x:.2f}\"))\n",
    "\n",
    "# ----------------------------\n",
    "# 7) Layout and save (high quality)\n",
    "# ----------------------------\n",
    "fig.tight_layout()\n",
    "\n",
    "# Save\n",
    "fig.savefig(OUT_PNG, dpi=600)\n",
    "fig.savefig(OUT_PDF)  # vector format for editing\n",
    "\n",
    "print(\"Saved:\")\n",
    "print(\"PNG:\", OUT_PNG)\n",
    "print(\"PDF:\", OUT_PDF)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d84065e-474b-46a2-b489-a35de826bb81",
   "metadata": {},
   "source": [
    "## Figure 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25aee725-8fbf-4b7e-a603-52c02d1e651a",
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE_DIR = r\"E:\\불평등 연구\\데이터\\67_원자력_지역불평등_미디어\\main\"\n",
    "DATA_FILE = os.path.join(BASE_DIR, \"synthetic_monthly_framing_2010_2025.xlsx\")\n",
    "\n",
    "OUT_PNG = os.path.join(BASE_DIR, \"figure_2_boxplots.png\")\n",
    "OUT_PDF = os.path.join(BASE_DIR, \"figure_2_boxplots.pdf\")\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 1) Load data\n",
    "# -------------------------------------------------\n",
    "df = pd.read_excel(DATA_FILE)\n",
    "\n",
    "df[\"date\"] = pd.to_datetime(df[\"month\"] + \"-01\")\n",
    "df[\"year\"] = df[\"date\"].dt.year\n",
    "\n",
    "g_nat = \"grievance_share_national\"\n",
    "g_loc = \"grievance_share_local\"\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 2) Five-year window variable\n",
    "# -------------------------------------------------\n",
    "min_year = int(df[\"year\"].min())\n",
    "max_year = int(df[\"year\"].max())\n",
    "\n",
    "def five_year_window(y, max_year=max_year):\n",
    "    start = (y // 5) * 5\n",
    "    end = min(start + 4, max_year)\n",
    "    if start == end:\n",
    "        return f\"{start}\"            # ✅ show \"2025\" instead of \"2025-2025\"\n",
    "    return f\"{start}-{end}\"\n",
    "\n",
    "df[\"five_year\"] = df[\"year\"].apply(five_year_window)\n",
    "windows = sorted(df[\"five_year\"].unique(), key=lambda s: int(s.split(\"-\")[0]))\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 3) Plot setup (Cities-style, clean)\n",
    "# -------------------------------------------------\n",
    "plt.rcParams.update({\n",
    "    \"font.size\": 14,\n",
    "    \"axes.titlesize\": 17,\n",
    "    \"axes.labelsize\": 14,\n",
    "    \"xtick.labelsize\": 14,\n",
    "    \"ytick.labelsize\": 13,\n",
    "    \"axes.linewidth\": 0.8\n",
    "})\n",
    "\n",
    "fig, axes = plt.subplots(\n",
    "    1, 2,\n",
    "    figsize=(15, 6),\n",
    "    sharey=True\n",
    ")\n",
    "\n",
    "ax1, ax2 = axes\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 4) Figure 2(a): Overall distribution\n",
    "# -------------------------------------------------\n",
    "ax1.boxplot(\n",
    "    [df[g_nat].dropna(), df[g_loc].dropna()],\n",
    "    widths=0.6,\n",
    "    showfliers=True\n",
    ")\n",
    "\n",
    "ax1.set_xticks([1, 2])\n",
    "ax1.set_xticklabels([\"National\", \"Local\"])\n",
    "ax1.set_ylabel(\"Grievance share\")\n",
    "ax1.set_ylim(0, 1)\n",
    "ax1.set_title(\"Figure 4(a). Overall distribution (2010–2025)\", pad=10)\n",
    "ax1.grid(True, axis=\"y\", linestyle=\"--\", linewidth=0.6, alpha=0.5)\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 5) Figure 2(b): Five-year window distributions\n",
    "# -------------------------------------------------\n",
    "box_data = []\n",
    "box_labels = []\n",
    "\n",
    "for w in windows:\n",
    "    sub = df[df[\"five_year\"] == w]\n",
    "    box_data.append(sub[g_nat].dropna())\n",
    "    box_data.append(sub[g_loc].dropna())\n",
    "    box_labels.append(f\"{w}\\nNat\")\n",
    "    box_labels.append(f\"{w}\\nLoc\")\n",
    "\n",
    "ax2.boxplot(\n",
    "    box_data,\n",
    "    widths=0.6,\n",
    "    showfliers=True\n",
    ")\n",
    "\n",
    "ax2.set_xticks(range(1, len(box_labels) + 1))\n",
    "ax2.set_xticklabels(box_labels, rotation=90, ha=\"center\")\n",
    "ax2.set_title(\"Figure 4(b). Distribution by five-year windows\", pad=10)\n",
    "ax2.grid(True, axis=\"y\", linestyle=\"--\", linewidth=0.6, alpha=0.5)\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 6) Axis formatting\n",
    "# -------------------------------------------------\n",
    "ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f\"{x:.1f}\"))\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 7) Save and show\n",
    "# -------------------------------------------------\n",
    "fig.tight_layout()\n",
    "fig.savefig(OUT_PNG, dpi=600)\n",
    "fig.savefig(OUT_PDF)\n",
    "\n",
    "plt.show()\n",
    "\n",
    "print(\"Saved files:\")\n",
    "print(OUT_PNG)\n",
    "print(OUT_PDF)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c00f9e7-2aef-449e-8c20-dcb3f29fdbbb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "b7a97442-d42a-4253-85f7-e4215624767f",
   "metadata": {},
   "source": [
    "# 2. Analysis 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af30dbc9-3c1e-4c28-9a07-4fc0b05c9490",
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE_DIR = r\"E:\\\\main\"\n",
    "DATA_FILE = os.path.join(BASE_DIR, \"synthetic_monthly_framing_2010_2025.xlsx\")\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 1) Load data\n",
    "# -------------------------------------------------\n",
    "df = pd.read_excel(DATA_FILE)\n",
    "df[\"date\"] = pd.to_datetime(df[\"month\"] + \"-01\")\n",
    "df[\"month_id\"] = df[\"date\"].dt.to_period(\"M\").astype(str)\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 2) Reshape to group-month panel\n",
    "# -------------------------------------------------\n",
    "df_long = pd.DataFrame({\n",
    "    \"month\": df[\"month_id\"].tolist() * 2,\n",
    "    \"group\": [\"national\"] * len(df) + [\"local\"] * len(df),\n",
    "    \"G\": pd.concat([\n",
    "        df[\"grievance_share_national\"],\n",
    "        df[\"grievance_share_local\"]\n",
    "    ], ignore_index=True)\n",
    "})\n",
    "\n",
    "df_long[\"Local\"] = (df_long[\"group\"] == \"local\").astype(int)\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 3) Main model: Monthly FE (OLS)\n",
    "# -------------------------------------------------\n",
    "model_fe = smf.ols(\n",
    "    \"G ~ Local + C(month)\",\n",
    "    data=df_long\n",
    ").fit(cov_type=\"HC1\")\n",
    "\n",
    "print(\"\\n=== Results 2(a): Monthly Fixed-Effects Model (OLS) ===\")\n",
    "print(model_fe.summary())\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 4) Fractional logit (robustness)\n",
    "# -------------------------------------------------\n",
    "model_frac = smf.glm(\n",
    "    \"G ~ Local + C(month)\",\n",
    "    data=df_long,\n",
    "    family=sm.families.Binomial()\n",
    ").fit(cov_type=\"HC1\")\n",
    "\n",
    "print(\"\\n=== Results 2(b): Fractional Logit (Robustness) ===\")\n",
    "print(model_frac.summary())\n",
    "\n",
    "\n",
    "# -------------------------------------------------\n",
    "# 6) Save regression tables (optional)\n",
    "# -------------------------------------------------\n",
    "out_txt = os.path.join(BASE_DIR, \"results_regression_tables.txt\")\n",
    "with open(out_txt, \"w\", encoding=\"utf-8\") as f:\n",
    "    f.write(\"Results 2(a): Monthly FE (OLS)\\n\")\n",
    "    f.write(model_fe.summary().as_text())\n",
    "    f.write(\"\\n\\nResults 2(b): Fractional Logit\\n\")\n",
    "    f.write(model_frac.summary().as_text())\n",
    "\n",
    "print(\"\\nSaved regression output to:\", out_txt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6be80d9a-757f-4ee3-b5a5-96f4cb02c7eb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
